library(Matrix)
library(scran)
library(Rtsne)
library(irlba)
library(cowplot)
source("/nfs/research1/marioni/jonny/chimera-tal1/scripts/core_functions.R")
load_data()
nPC = 50
In this script, we perform batch correction on our data.
For batch correction, we employ the scran function fastMNN, which performs batch correction in the manner of mnnCorrect, but in the PC-space, and much faster. Critically, this is a composition-aware batch-correction strategy that should not be affected by the lack of e.g. blood cells in the knockout samples. We correct within each timepoint only.
hvgs = getHVGs(sce)
correct = doBatchCorrect(counts = logcounts(sce[hvgs,]),
timepoints = as.character(meta$tomato), #first correct genotypes separately
samples = meta$sample,
timepoint_order = c("FALSE", "TRUE"), #host cells first
sample_order = 1:4 #doesn't matter as pairwise correction
)
corrected = list(all = correct[match(meta$cell, rownames(correct)),])
base = prcomp_irlba(t(logcounts(scater::normalize(sce[hvgs, ]))), n = nPC)$x
saveRDS(corrected, file = "/nfs/research1/marioni/jonny/chimera-tal1/data/corrected_pcas.rds")
saveRDS(base, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/base_pca.rds")
A t-SNE visualisation of our cells, pre- and post-correction, is shown in Figure 1.
tsne_pre = Rtsne(base, pca = FALSE)$Y
tsne_post = Rtsne(corrected$all, pca = FALSE)$Y
ro = sample(nrow(base), nrow(base))
p1 = ggplot(as.data.frame(tsne_pre)[ro,], aes(x = V1, y = V2, col = factor(meta$sample)[ro])) +
geom_point(size = 0.4) +
scale_colour_manual(values = c("3" = "black", "1" = "red", "2" = "coral", "4" = "darkgrey")) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
ggtitle("Pre-correction")
p2 = ggplot(as.data.frame(tsne_post)[ro,], aes(x = V1, y = V2, col = factor(meta$sample)[ro])) +
geom_point(size = 0.4) +
scale_colour_manual(values = c("3" = "black", "1" = "red", "2" = "coral", "4" = "darkgrey")) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
ggtitle("Post-correction")
plot_grid(p1, p2, nrow = 2)
Figure 1: t-SNE of cells before and after correction
Red and coral cells are Tomato+ (injected), black and grey cells are Tomato- (embryo). Coral and grey cells mark the thrid and fourth samples in the E7.5 timepoint.
Figure 2 shows the same plots, but coloured by the mapped celltype (see the mapping script for details). Doublets and stripped nuclei are excluded.
corrected_final = corrected$all[!meta$celltype.mapped %in% c("Stripped", "Doublet"),]
meta_final = meta[!meta$celltype.mapped %in% c("Stripped", "Doublet"),]
tsne_final = Rtsne(corrected_final, pca = FALSE)$Y
ro = sample(nrow(meta_final), nrow(meta_final))
plegend = ggplot(as.data.frame(tsne_final)[ro,], aes(x = V1, y = V2, col = factor(meta_final$celltype.mapped[ro], levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.4) +
scale_colour_manual(values = celltype_colours, name = "") +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
guides(col = guide_legend(override.aes = list(size = 5), ncol = 5))
p1 = ggplot(as.data.frame(tsne_final)[ro,], aes(x = V1, y = V2, col = factor(meta_final$celltype.mapped[ro], levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.4) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
plot_grid(p1, get_legend(plegend), ncol = 1)
Figure 2: t-SNE, coloured by celltype
Finally, we generate UMAP coordinates of the batch-corrected data. Doublets and stripped nuclei are excluded. The UMAP is shown in Figure 3.
write.table(corrected_final, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/scanpy_input.tab", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
system("python3 /nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/umap.py")
umap = read.table("/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/scanpy_output.tab", sep = "\t", header = FALSE)
p1 = ggplot(as.data.frame(umap)[ro,], aes(x = V1, y = V2, col = factor(meta_final$celltype.mapped[ro], levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.2) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank())
plot_grid(p1, get_legend(plegend), ncol = 1)
Figure 3: UMAP of chimera cells
ggsave(p1, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/umap.pdf",
width = 5, height = 5)
The same UMAP plotted for Tomato positive and negative cells is shown in Figure 4.
p1 = ggplot(as.data.frame(umap)[meta_final$tomato,][ro,], aes(x = V1, y = V2,
col = factor(meta_final$celltype.mapped[meta_final$tomato][ro],
levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.2) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
lims(x= c(min(umap$V1), max(umap$V1)),
y= c(min(umap$V2), max(umap$V2)))
p2 = ggplot(as.data.frame(umap)[!meta_final$tomato,][ro,], aes(x = V1, y = V2,
col = factor(meta_final$celltype.mapped[!meta_final$tomato][ro],
levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.2) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
lims(x= c(min(umap$V1), max(umap$V1)),
y= c(min(umap$V2), max(umap$V2)))
plot_grid(p1 + ggtitle("Tom+"), p2 + ggtitle("Tom-"), ncol = 2)
Figure 4: UMAP plotted separately for Tomato+ or Tomato- cells
ggsave(p1, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/umap_tom+.pdf",
width = 5, height = 5)
ggsave(p2, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/umap_tom-.pdf",
width = 5, height = 5)
write.table(umap, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/umap.tab", sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
rownames(umap) = meta_final$cell
top = which(!meta_final$celltype.mapped %in% c("Mixed mesoderm", "Notochord"))
bottom = which(meta_final$celltype.mapped %in% c("Mixed mesoderm", "Notochord"))
ro = c(top[sample(length(top), length(top))],
bottom)
umap_manual = umap[ro,]
meta_manual = meta_final[ro,]
meta_manual$X = umap_manual[,1]
meta_manual$Y = umap_manual[,2]
p1 = ggplot(meta_manual[meta_manual$tomato,], aes(x = X, y = Y, col = factor(celltype.mapped, levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.2) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
lims(x= c(min(umap$V1), max(umap$V1)),
y= c(min(umap$V2), max(umap$V2)))
p2 = ggplot(meta_manual[!meta_manual$tomato,], aes(x = X, y = Y, col = factor(celltype.mapped, levels = names(celltype_colours), ordered = TRUE))) +
geom_point(size = 0.2) +
scale_colour_manual(values = celltype_colours) +
theme(legend.position = "none",
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
lims(x= c(min(umap$V1), max(umap$V1)),
y= c(min(umap$V2), max(umap$V2)))
ggsave(p1, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/umap_tom+_reordered.pdf",
width = 5, height = 5)
ggsave(p2, file = "/nfs/research1/marioni/jonny/chimera-tal1/scripts/batch_correct/umap_tom-_reordered.pdf",
width = 5, height = 5)
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] biomaRt_2.37.3 scater_1.9.10
## [3] cowplot_0.9.3 ggplot2_3.0.0
## [5] irlba_2.3.2 Rtsne_0.13
## [7] scran_1.9.13 SingleCellExperiment_1.3.9
## [9] SummarizedExperiment_1.11.6 DelayedArray_0.7.21
## [11] matrixStats_0.54.0 Biobase_2.41.2
## [13] GenomicRanges_1.33.11 GenomeInfoDb_1.17.1
## [15] IRanges_2.15.16 S4Vectors_0.19.19
## [17] BiocGenerics_0.27.1 BiocParallel_1.15.8
## [19] Matrix_1.2-14 BiocStyle_2.9.3
## [21] rmarkdown_1.10
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 bit64_0.9-7
## [3] httr_1.3.1 progress_1.2.0
## [5] rprojroot_1.3-2 dynamicTreeCut_1.63-1
## [7] tools_3.5.0 backports_1.1.2
## [9] R6_2.2.2 vipor_0.4.5
## [11] DBI_1.0.0 lazyeval_0.2.1
## [13] colorspace_1.3-2 withr_2.1.2
## [15] prettyunits_1.0.2 tidyselect_0.2.4
## [17] gridExtra_2.3 bit_1.1-14
## [19] compiler_3.5.0 labeling_0.3
## [21] bookdown_0.7 scales_0.5.0
## [23] stringr_1.3.1 digest_0.6.15
## [25] XVector_0.21.3 pkgconfig_2.0.1
## [27] htmltools_0.3.6 highr_0.7
## [29] limma_3.37.3 rlang_0.2.1
## [31] RSQLite_2.1.1 DelayedMatrixStats_1.3.4
## [33] bindr_0.1.1 dplyr_0.7.6
## [35] RCurl_1.95-4.11 magrittr_1.5
## [37] GenomeInfoDbData_1.1.0 Rcpp_0.12.18
## [39] ggbeeswarm_0.6.0 munsell_0.5.0
## [41] Rhdf5lib_1.3.1 viridis_0.5.1
## [43] stringi_1.2.4 yaml_2.2.0
## [45] edgeR_3.23.3 zlibbioc_1.27.0
## [47] rhdf5_2.25.4 plyr_1.8.4
## [49] grid_3.5.0 blob_1.1.1
## [51] crayon_1.3.4 lattice_0.20-35
## [53] hms_0.4.2 locfit_1.5-9.1
## [55] knitr_1.20 pillar_1.3.0
## [57] igraph_1.2.1 rjson_0.2.20
## [59] kmknn_0.99.16 reshape2_1.4.3
## [61] XML_3.98-1.12 glue_1.3.0
## [63] evaluate_0.11 data.table_1.11.4
## [65] gtable_0.2.0 purrr_0.2.5
## [67] assertthat_0.2.0 xfun_0.3
## [69] viridisLite_0.3.0 tibble_1.4.2
## [71] AnnotationDbi_1.43.1 beeswarm_0.2.3
## [73] memoise_1.1.0 tximport_1.9.8
## [75] bindrcpp_0.2.2 statmod_1.4.30